German Tanks

Author

Oliver d’Pug

Simulate a Number of Battles

The function tanks simulates n_reps battles with n_obs serial numbers recorded for each battle. The argument n_tanks is the number of tanks that the Germans had. The argument fixedest is the expert’s best guess.

Code
tanks <- function (n_tanks = 47, n_obs = 5, n_reps = 100, fixedest = 50) 
{
    temp <- as.matrix(rep(n_tanks, n_reps))
    temp <- apply(temp, 1, sample, size = n_obs)
    temp <- t(apply(temp, 2, tanks.ests, fixedest))
    temp
}

Compute Estimates

The function tank.ests runs on a sample of observed serial numbers. Each of the vector values is the result of a student supplied estimator. These need to be changed to reflect the class’ ideas.

Code
tanks.ests <- function (x = stop("Argument 'x' is missing"), fixedest = 125) 
{
    n <- length(x)
    nodd <- n %% 2 == 1
    ests <- rep(NA, 16)
    xbar <- mean(x)
    xmedian <- median(x)
    xn <- max(x)
    xvar <- var(x)
    xstddev <- sqrt(xvar)
    x1 <- min(x)
    rng <- c(-1,1)%*%range(x)
    xsum <- sum(x)
    xnm1 <- sort(x)[n-1]
    
    ests[1] <- 2 * xbar
    ests[2] <- 2 * xmedian
    ests[3] <- xn
    ests[4] <- xbar + 2*xstddev
    ests[5] <- xbar + 3*xstddev
    ests[6] <- nodd * (xmedian + x[(n+1)/2 + 1]) + !nodd * (xmedian + x[n/2 + 1])
    ests[7] <- xn + xmedian
    #
      y = sort(x)
      z = c(NA,y[-n])
    ests[8] <- xn + mean(y-z, na.rm = TRUE)
      z = c(0,y[-n])
    ests[9] <- xn + mean(y-z)
      z = c(1,y[-n])
    ests[10] <- xn + mean(y-z)
    ests[11] <- (3*xn - x1)/2
    ests[12] <- (5*xn - xbar)/3
    ests[13] <- 3*xmedian + rng
    ests[14] <- xn
    ests[15] <- xn * ((n+1)/n) - 1
    ests[16] <- fixedest 
    names(ests) <- c("2*xbar", "2*m", "x[n]", "xbar+2s", "xbar+3s", "m + next largest",
                     "x[n] + m", "x[n]+mean(x -lag(x))", "x[n]+mean(x -lag(x) w/ 0)", 
                     "x[n]+mean(x -lag(x) w/ 1)", "(3x[n]-x[1])/2", "(5*x[n]-xbar)/3",
                     "3m + range(x)", "x[n]", "UMVUE", fixedest)
    ests   
    
    #ests[1] <- xn
    #ests[2] <- 2 * xbar
    #ests[3] <- xn + x1
    #ests[4] <- xn + rng/2
    #ests[5] <- xbar + xstddev
    #ests[6] <- 2 * xmedian
    #ests[7] <- xn + xnm1
    #ests[8] <- 2*xn - x1
    #ests[9] <- xsum
    #ests[10] <- xn * ((n+1)/n) - 1
    #ests[11] <- fixedest
    #names(ests) <- c("x[n]","2*xbar", "x[n]+x[1]", "x[n]+R/2", "xbar+s", "2*m", 
    #                 "x[n]+x[n-1]","2x[n]-x[1]","sum(x)", "UMVUE",fixedest)
    #ests
}

Compute Descriptive Statistics

The function tanks.descriptives computes descriptive statistics for each of the estimators in tanks.ests.

Code
tanks.descriptives <- function (n = 47, obs = 5, reps = 100, fixedest = 50) 
{
    temp <- tanks(n, obs, reps, fixedest)
    means <- apply(temp, 2, mean)
    stddevs <- sqrt(apply(temp, 2, var))
    medians <- apply(temp, 2, median)
    bias <- means - n
    mse <- bias^2 + stddevs^2
    t(cbind(means, stddevs, medians, bias, mse))
}

Plot Estimates

The individual estimates computed for the samples from each battle can be plotted. This allows us to compare location and dispersion statistics — center and spread. tanks.plots2 is intended to be an “improved” version of tanks.plots. Both plots use traditional lattice boxplots and there is a ggplot plot as well.

Code
### Load the lattice package
p_load(lattice)

tanks.plots <- function (n = 47, obs = 5, reps = 100, fixedest = 50) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    bwplot(factor(temp[, 2], labels = tanknames) ~ 
        temp[, 1], xlab = "Number of Tanks", 
        ylab = "Estimator")
}

tanks.plots2 <- function (n = 47, obs = 5, reps = 100, fixedest = 50) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    bwplot(factor(temp[, 2], labels = tanknames) ~ 
        temp[, 1], xlab = "Number of Tanks", 
        ylab = "Estimator", panel = function (x , 
            y , vref = n, ... ) 
        {
            panel.bwplot(x, y)
            panel.abline(v = vref, lty = 2)
        }, vref = n)
}

p_load(ggplot2)
p_load(tidyverse)
tanks.plots3 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    temp <- as_tibble(temp)
    names(temp) <- c("Estimate", "Estimator")
    temp$Estimator <- factor(temp$Estimator)
    ggplot(temp, aes(x=Estimator, y=Estimate)) +
      geom_boxplot(alpha=0.7) +
      stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
      theme(legend.position="none") +
      scale_fill_brewer(palette="Set1") +
      geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
      coord_flip()
}

tanks.plots4 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    temp <- melt(temp)
    names(temp) <- c("RowID","Estimator","Estimate")
    ggplot(temp, aes(x=reorder(Estimator, Estimate, FUN=mean), y=Estimate)) +
      geom_boxplot(alpha=0.7) +
      stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
      theme(legend.position="none") +
      xlab("Estimator") +
      scale_fill_brewer(palette="Set1") +
      geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
      coord_flip()
}

tanks.plots5 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    temp <- melt(temp)
    names(temp) <- c("RowID","Estimator","Estimate")
    ggplot(temp, aes(x=reorder(Estimator, Estimate, FUN=mean), y=Estimate)) +
      geom_violin(alpha=0.7) +
      stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
      theme(legend.position="none") +
      xlab("Estimator") +
      scale_fill_brewer(palette="Set1") +
      geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
      coord_flip()
}

Compare our Estimators

The class’ estimators can be compared using the functions defined above.

Code
  ### Compute descriptive stats
  t(tanks.descriptives(n = 47, obs = 5, reps = 10000, fixedest = 50))
                              means   stddevs   medians      bias        mse
2*xbar                     48.14516 11.571031  48.00000  1.145160  135.20014
2*m                        48.10140 17.023910  48.00000  1.101400  291.02658
x[n]                       40.09400  6.225531  42.00000 -6.906000   86.45008
xbar+2s                    50.67561  8.970541  51.65893  3.675611   93.98072
xbar+3s                    63.97713 11.866761  65.09947 16.977127  429.04285
m + next largest           48.15770 18.681251  48.00000  1.157700  350.32940
x[n] + m                   64.14470 12.548231  65.00000 17.144700  451.39885
x[n]+mean(x -lag(x))       48.10440  7.624107  50.00000  1.104400   59.34671
x[n]+mean(x -lag(x) w/ 0)  48.11280  7.470638  50.40000  1.112800   57.04875
x[n]+mean(x -lag(x) w/ 1)  47.91280  7.470638  50.20000  0.912800   56.64363
(3x[n]-x[1])/2             56.11480  9.244018  58.00000  9.114800  168.53145
(5*x[n]-xbar)/3            58.79914  9.245939  61.13333 11.799140  224.70709
3m + range(x)             104.19370 26.563252 105.00000 57.193700 3976.72566
x[n]                       40.09400  6.225531  42.00000 -6.906000   86.45008
UMVUE                      47.11280  7.470638  49.40000  0.112800   55.82315
50                         50.00000  0.000000  50.00000  3.000000    9.00000
Code
  ### Plot the estimates from each of the estimators
  tanks.plots(n = 47, obs = 5, reps = 10000, fixedest = 50)

Code
  ### Plot the estimates from each of the estimators
  tanks.plots2(n = 47, obs = 5, reps = 10000, fixedest = 50)

Code
  ### Plot the estimates from each of the estimators
  tanks.plots4(n = 47, obs = 5, reps = 10000, fixedest = 50)

Code
  ### Plot the estimates from each of the estimators
  tanks.plots5(n = 47, obs = 5, reps = 10000, fixedest = 50)

Note that \widehat{N} = X_{(n)} \frac{n+1}{n} - 1 is UMVUE for N when the X_i are i.i.d. DU(1,N).